# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------


  library(tidyverse)
  library(xtable)
  options(xtable.comment = FALSE)
  source("fun/fun_electbeleifs_MLE.R")
  source("fun/fun_utilities.R")

  
  

  source("00_DataCleaner_symmetric.R") # Richard Code to clean Data
  dat_sym <- filter(data) # Filter with screen questions

  source("00_DataCleaner_asymmetric.R") # Richard Code to clean Data
  dat_asym <- filter(data) # Filter with screen questions

  source("00_DataCleaner_symmetric_variance.R") # Richard Code to clean Data
  dat_sym_var <- filter(data) # Filter with screen questions

  source("00_DataCleaner_asymmetric_variance.R") # Richard Code to clean Data
  dat_asym_var <- filter(data) # Filter with screen questions

  source("00_DataCleaner_nachtrag_sym.R") # Richard Code to clean Data
  dat_nach_sym <- filter(data) # Filter with screen questions

  source("00_DataCleaner_nachtrag_asym.R") # Richard Code to clean Data
  dat_nach_asym <- filter(data) # Filter with screen questions

# Estimation  ============

  res <- list() # Result List

#+ Quantile, echo=FALSE, include=FALSE
## Quantile Question  ============

  # Estimate Model for Quantile Question Symmetric, Low Variance
  res[["sym_lvar_quant"]] <- dat_sym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(60,60))

  # Estimate Model for Quantile Question Asymmetric, Low Variance
  res[["asym_lvar_quant"]]  <- dat_asym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, High Variance
  res[["sym_hvar_quant"]] <- dat_sym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(30,30))

  # Estimate Model for Quantile Question Asymmetric Variance, High Variance
  res[["asym_hvar_quant"]] <- dat_asym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(30,15))

#+ Quantile2, echo=FALSE, include=FALSE
## Quantile Question (without correction)  ============

  # # Estimate Model for Quantile Question Symmetric, Low Variance
  # res[["sym_lvar_qtnsc"]] <- dat_nach_sym %>%
  #   creat_dat_Y(method="QuantileNoCorrect") %>%
  #   est_eB_beta(true.val = c(60,60))
  # 
  # # Estimate Model for Quantile Question Asymmetric, Low Variance
  # res[["asym_lvar_qtnsc"]]  <- dat_nach_asym %>%
  #   creat_dat_Y(method="Quantile") %>%
  #   est_eB_beta(true.val = c(60,30))

## Intervall Question (Wide) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intwi"]]  <- dat_sym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(60,60))

  # Estimate Model for Quantile Question Aaymmetric, low Variance
  res[["asym_lvar_intwi"]]  <- dat_asym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intwi"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intwi"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(30,15))

#+ Intervall2, echo=FALSE, include=FALSE
## Intervall Question (Narrow) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intna"]]  <- dat_sym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric
  res[["asym_lvar_intna"]]  <- dat_asym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intna"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intna"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(30,15))

#+ Manski, echo=FALSE, include=TRUE
## Manski Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_manski"]]  <- dat_sym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(60,60))

  Y <- dat_sym %>%
    creat_dat_Y("Manski")


  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_lvar_manski"]]  <- dat_asym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_hvar_manski"]]  <- dat_sym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(30,30))

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_hvar_manski"]]  <- dat_asym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(30,15))

#+ BinsBalls, echo=FALSE, include=FALSE
## Bins and Balls Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_binba"]]  <- dat_sym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(60,60))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["asym_lvar_binba"]]  <- dat_asym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["sym_hvar_binba"]]  <- dat_sym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["asym_hvar_binba"]]  <- dat_asym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(30,15))

#+ Figure, echo=FALSE, include=TRUE, fig.cap = "Comparsion of average elecited beliefs and true data distribution"
## Figure Average Estimate  ============

  # Get estimates
  par_est <- data.frame(t(sapply(sapply(res, "[[", "par"),
                                 function(res) res[names(res) %in% c("alpha","beta")])))

  # Name estimates  & create Factors
  par_est <- par_est %>%
    mutate(type=names(res)) %>%
    separate("type",c("type","var","method")) %>%
    mutate(type = factor(type, levels = c("sym","asym"), labels = c("Symmetric","Asymetric")),
           var = factor(var, levels = c("lvar","hvar"),labels = c("Small Variance","Large Variance")),
           method = factor(method, labels = c("Bins and Balls","Interval (Wide)",
                                              "Interval (Narrow)", "Manski", "Quantile")),
           N = sapply(res, "[[", "n"))

  # Expand Data.frame
  expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))
  df_est <- expand.grid.df(data.frame("x"=seq(0.2,0.8,0.01)), par_est)

  # Prepare Data for Plot
  df_est       <- mutate(df_est, db = dbeta(x,alpha,beta))

  
  df_true <- data.frame(expand.grid("type"=c("Symmetric","Asymetric"),
                        "var"=c("Large Variance","Small Variance")) %>%
             mutate(alpha_true = c(30,30,60,60),
                    beta_true = c(30,15,60,30)))

  # Doesn't work here.
  df_est <- left_join(df_est,df_true, by=c("type","var")) %>%
     mutate(db_true = dbeta(x,alpha_true,beta_true)) %>%
      select(-alpha_true, -beta_true) %>%
      gather("db","db_true",key=Scenario, value=val) %>%
      mutate(Scenario = factor(ifelse(Scenario=="db","Elicited Beliefs","Data Distribution")),
             Scenario = relevel(Scenario, "Elicited Beliefs"))

  # Plot Data
  ggplot(df_est) +
    geom_line(aes(x=x,y=val,linetype=Scenario)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                             colour = "grey96",
                             size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )

  ggsave("out/fig/Fig_A2.pdf",width = 12,height = 12)

#+ table, echo=FALSE, include=TRUE, results="asis"
## Estimates Table  ============

  # Prepare Table for symmetric Results
  est_tab<- left_join(par_est, df_true)  %>%
                 mutate(lik = sapply(res, "[[", "log_lik"),
                        lr = sapply(res, "[[", "lr")) %>%
                 rowwise() %>%
                 mutate(KL = KL_dis_beta(c(alpha_true,beta_true), as.numeric(c(alpha,beta)))) %>%
                 select(c("method","type","var","alpha","beta","KL","lik","lr","N"))

  ls_est <- split(est_tab, list(est_tab$type, est_tab$var))
  nam_l <- c("A6_c","A6_d","A6_a","A6_b")
  for(nam in names(ls_est)){
    scena <- which(nam == names(ls_est))
    dat_tab <- ls_est[[nam]] %>% arrange(KL) %>%
          select(c("method","alpha","beta","KL","lr","N"))
    types <- strsplit(nam,"[.]")[[1]]
    text <- paste("Estimates for", types[1],"Distribution with",types[2],sep=" ")
    print(xtable(dat_tab, caption=text), floating = FALSE,booktabs = T, include.rownames=FALSE,
          #file=paste("out/fig/Tab_ElecitedBeliefs_",str_replace(nam," ",""),"_MLE_nscren.tex",sep=""))
          file=paste("out/fig/Tab_",str_replace(nam_l[scena]," ",""),".tex",sep=""))

  }

